* Project      :  The Full, Persistent, and Symmetric Pass-Through of a Temporary VAT Cut
* Authors      :  Márcia Silva-Pereira, João Quelhas, Tiago Bernardino, Ricardo Duque Gabriel
* Date         :  01/04/2025
* Description  :  Dataset Construction
* Dependencies :
* Modifications: (add date, author and change)

*********************************************************
*               Construct the main dataset              *
*********************************************************

// Initialize an empty dataset with the same structure as the data files.
quietly use "${path_source}/SDP_DPROD_202301202312_AUCHAN_${extraction}_FB_V01.dta", clear

keep if _n == 0

// Generate a new variable to hold the supermarket name.
gen supermarket = ""

** 2021

foreach store in "AUCHAN" "MINIPRECO" "PINGODOCE" {

	use "${path_source}/SDP_DPROD_202107202112_`store'_${extraction}_FB_V01.dta", clear

	unskizpanel id date
	
	gen supermarket = "`store'"
	
	save "${path_work}/auxiliar/tempfile_2021_`store'.dta", replace

	}

foreach store in "CONTINENTE" {
	
	use "${path_source}/SDP_DPROD_202109202112_`store'_${extraction}_FB_V01.dta", clear

	unskizpanel id date
	
	gen supermarket = "`store'"
	
	save "${path_work}/auxiliar/tempfile_2021_`store'.dta", replace

	}

** 2022

foreach store in "AUCHAN" "CONTINENTE" "MINIPRECO" "PINGODOCE" {
	
 	use "${path_source}/SDP_DPROD_202201202212_`store'_${extraction}_FB_V01.dta", clear

	unskizpanel id date
	
	gen supermarket = "`store'"
	
	save "${path_work}/auxiliar/tempfile_2022_`store'.dta", replace

	}

** 2023

foreach store in "AUCHAN" "CONTINENTE" "MINIPRECO" "PINGODOCE" {

 	use "${path_source}/SDP_DPROD_202301202312_`store'_${extraction}_FB_V01.dta", clear

	unskizpanel id date
	
	gen supermarket = "`store'"
	
	save "${path_work}/auxiliar/tempfile_2023_`store'.dta", replace

	}

** 2024

foreach store in "AUCHAN" "CONTINENTE" "MINIPRECO" "PINGODOCE" {

 	use "${path_source}/SDP_DPROD_202401202412_`store'_${extraction}_FB_V01.dta", clear

	unskizpanel id date
	
	gen supermarket = "`store'"
	
	save "${path_work}/auxiliar/tempfile_2024_`store'.dta", replace

	}

// Merge all temp files created for each supermarket.
use "${path_work}/auxiliar/tempfile_2021_AUCHAN.dta", clear
append using "${path_work}/auxiliar/tempfile_2021_CONTINENTE.dta" 
append using "${path_work}/auxiliar/tempfile_2021_MINIPRECO.dta"
append using "${path_work}/auxiliar/tempfile_2021_PINGODOCE.dta"

append using "${path_work}/auxiliar/tempfile_2022_AUCHAN.dta" 
append using "${path_work}/auxiliar/tempfile_2022_CONTINENTE.dta" 
append using "${path_work}/auxiliar/tempfile_2022_MINIPRECO.dta"
append using "${path_work}/auxiliar/tempfile_2022_PINGODOCE.dta"

append using "${path_work}/auxiliar/tempfile_2023_AUCHAN.dta" 
append using "${path_work}/auxiliar/tempfile_2023_CONTINENTE.dta" 
append using "${path_work}/auxiliar/tempfile_2023_MINIPRECO.dta"
append using "${path_work}/auxiliar/tempfile_2023_PINGODOCE.dta"

append using "${path_work}/auxiliar/tempfile_2024_AUCHAN.dta" 
append using "${path_work}/auxiliar/tempfile_2024_CONTINENTE.dta" 
append using "${path_work}/auxiliar/tempfile_2024_MINIPRECO.dta"
append using "${path_work}/auxiliar/tempfile_2024_PINGODOCE.dta"

// Sort the data.
sort id date

// Drop the data from March 2024, not needed.
drop if date >= td(01/03/2024)

// Save the updated dataset used to work onwards.
keep id name brand date supermarket price ean13 regular_price ppu ecoicop
order id ean13 date name brand supermarket price regular_price ppu ecoicop

// Save the working dataset.
save "${path_work}/auxiliar/working_dataset.dta", replace

erase "${path_work}/auxiliar/tempfile_2021_AUCHAN.dta" 
erase "${path_work}/auxiliar/tempfile_2021_CONTINENTE.dta" 
erase "${path_work}/auxiliar/tempfile_2021_MINIPRECO.dta"
erase "${path_work}/auxiliar/tempfile_2021_PINGODOCE.dta"

erase "${path_work}/auxiliar/tempfile_2022_AUCHAN.dta" 
erase "${path_work}/auxiliar/tempfile_2022_CONTINENTE.dta" 
erase "${path_work}/auxiliar/tempfile_2022_MINIPRECO.dta"
erase "${path_work}/auxiliar/tempfile_2022_PINGODOCE.dta"

erase "${path_work}/auxiliar/tempfile_2023_AUCHAN.dta" 
erase "${path_work}/auxiliar/tempfile_2023_CONTINENTE.dta" 
erase "${path_work}/auxiliar/tempfile_2023_MINIPRECO.dta"
erase "${path_work}/auxiliar/tempfile_2023_PINGODOCE.dta"

erase "${path_work}/auxiliar/tempfile_2024_AUCHAN.dta" 
erase "${path_work}/auxiliar/tempfile_2024_CONTINENTE.dta" 
erase "${path_work}/auxiliar/tempfile_2024_MINIPRECO.dta"
erase "${path_work}/auxiliar/tempfile_2024_PINGODOCE.dta"

*********************************************************
*          Identify the goods with zero VAT             *
*********************************************************

// Initialize an empty dataset with the same structure as the data files.
quietly use "${path_source}/SDP_LISTPRODIVA0_AUCHAN_${extraction}_V01.dta", clear

keep if _n == 0

// Generate a new variable to hold the supermarket name.
gen supermarket = ""

foreach store in "AUCHAN" "CONTINENTE" "MINIPRECO" "PINGODOCE" {
	
	local filename "${path_source}/SDP_LISTPRODIVA0_`store'_${extraction}_V01.dta"
		
	append using "`filename'"
	
	replace supermarket = "`store'" if supermarket == ""

	}

// Save the current dataset to a file named "merged_zero_vat_list.dta".
save "${path_work}/auxiliar/merged_zero_vat_list.dta", replace

*********************************************************
*              Combine the two datasets                 *
*********************************************************

// Load the dataset named "working_dataset.dta" with a list of goods and zero VAT.
use "${path_work}/auxiliar/working_dataset.dta", clear

// Merge the datasets based on the product ID and SUPERMARKET, using the "merged_zero_vat_list.dta".
merge m:1 id supermarket using "${path_work}/auxiliar/merged_zero_vat_list.dta"

// Drop rows from zero VAT dataset where the merge resulted in no match (i.e., _merge == 2).
drop if _merge == 2

// Generate a variable 'treatment' based on the merge status (3 indicates a match).
gen treatment = _merge == 3

// Define labels for 'treatment' variable (0 as "control" and 1 as "treated").
label define lbl_treatment 0 "control" 1 "treated", replace

// Apply the defined labels to the 'treatment' variable.
label values treatment lbl_treatment

// Drop the merge status variable '_merge'.
drop _merge

// Group id and supermarket.
egen id_sm = group(id supermarket name)

// Set the data as a panel with daily observations.
xtset id_sm date, daily

// Save the updated dataset to a file named "combined_dataset.dta".
save "${path_work}/auxiliar/combined_dataset.dta", replace

*********************************************************

erase "${path_work}/auxiliar/working_dataset.dta"

*********************************************************
